%load_ext autoreload
%autoreload 2
# import git
# import os
# import getpass
# token = getpass.getpass()
# git.Git('/nfs/team205/ny1/ThymusSpatialAtlas/ImageSpot/').clone('https://nadavyayon:'+token+'@github.com/nadavyayon/jupyter-annotator.git')
you can use my env but do not modify it!!!! conda activate /nfs/team205/ny1/miniconda/miniconda3/envs/imagespot
import os
import numpy as np
from bokeh.plotting import show, output_notebook
import pandas as pd
import socket
os.chdir('/nfs/team205/ny1/ThymusSpatialAtlas/ImageSpot/tissue-tag/')
import tissue_tag as tt
os.chdir('/nfs/team205/ny1/ThymusSpatialAtlas/ImageSpot/')
import visiumator as vm
import matplotlib.pyplot as plt
import scanpy as sc
output_notebook()
host = '5011'
plt.figure(figsize=[12,12])
vis_path = '/nfs/team205/ny1/Visium/ThymusAtlasDataSet/set160222/Align_and_detect_output/V11J11-099_Z4-THY-FO3_A1_TA11556493/'
im,ppm,df = tt.read_visium(SpaceRanger_dir_path=vis_path,use_resolution='hires',res_in_ppm=0.5)
coordinates = np.vstack((df[5],df[4]))
plt.imshow(im,origin='lower')
plt.plot(coordinates[0,:],coordinates[1,:],'.')
plt.title( 'ppm - '+str(ppm))
# adata = AnnVis[AnnVis.obs['in_tissue']==True]
<Figure size 864x864 with 0 Axes>
<matplotlib.image.AxesImage at 0x1480568e5d30>
[<matplotlib.lines.Line2D at 0x1480568f9250>]
Text(0.5, 1.0, 'ppm - 0.5')
# define annotations Lv0
annodict = {
'unassigned':'yellow',
'Artifacts':'black',
'Medulla':'green',
'Cortex':'cyan',
'Edge':'brown',
'HS':'magenta',
'Vessels':'blue',
'PVS':'red',
'Other':'orange',
}
# generate automated labels from markers
# these marker genes would be used to train the classifier in later stages or just to define the annotations if going with the manual option
gene_markers = {
'Medulla' : 'AIRE',
'Cortex' : 'ARPP21',
# 'HS' : 'IVL',
# 'Vessels' : 'CD34',
}
labels_per_marker = { # how many spots per marker gene
'Medulla' : 50,
'Cortex' : 150,
# 'HS' : 15,
# 'Vessels' : 15,
}
r = 100
adata = sc.read_visium(vis_path,count_file='raw_feature_bc_matrix.h5')
adata = adata[df.index.intersection(adata.obs.index)]
training_labels = tt.background_labels(im.shape[:2],coordinates,every_x_spots = 100,r=r*ppm) # generate background labels
training_labels_rgb = tt.rgb_from_labels(training_labels,colors=list(annodict.values()))
training_labels = tt.gene_labels(adata,df,training_labels,gene_markers,annodict,r*ppm,labels_per_marker)
training_labels_rgb = tt.rgb_from_labels(training_labels,colors=list(annodict.values()))
out = tt.overlay_lebels(im,training_labels_rgb,alpha=0.5)
AIRE ARPP21
p, render_dict = tt.scribbler(im,annodict,plot_scale=1)
def app(doc):
global p
doc.add_root(p)
import socket
show(app,notebook_url=f'{socket.gethostname()}:'+host) # determine if working on a server or local environment, if working on a local pc - 'local', while working on server instance indicate the port number e.g. 5011 (string)
print('wait for it.. can take a few tens of seconds on farm...')
wait for it.. can take a few tens of seconds on farm...
line_width = 100 # microns
training_labels_scrib = tt.scribble_to_labels(im,render_dict,line_width=line_width*ppm)
training_labels = training_labels + training_labels_scrib
training_labels_rgb = tt.rgb_from_labels(training_labels,colors=list(annodict.values()))
out = tt.overlay_lebels(im,training_labels_rgb,alpha=0.5)
unassigned Artifacts Medulla Cortex Edge HS Vessels PVS Other
%%time
# this step takes about 30s to 5 min depending on number of training areas and resolution
result = tt.sk_rf_classifier(im,training_labels)
# plot result
result_rgb = tt.rgb_from_labels(result,colors=list(annodict.values()))
out_img = tt.overlay_lebels(im,result_rgb,alpha=0.7)
CPU times: user 3min 9s, sys: 21.9 s, total: 3min 31s Wall time: 53.4 s
# at this stage, if you are adding annotations make sure they come after the current ones otherwise it will mess up everything
result_rgb = tt.rgb_from_labels(result,colors=list(annodict.values()))
out_img = tt.overlay_lebels(im,result_rgb,alpha=0.5,show=False)
tabs,render_dict = tt.annotator(im,out_img,anno_dict=annodict,fig_downsize_factor=4)
# plotting
def app(doc):
global tabs
doc.add_root(tabs)
show(app,notebook_url=f'{socket.gethostname()}:'+host)
print('wait for it.. can take a few tens of seconds on farm... (double time of the former plot)')
wait for it.. can take a few tens of seconds on farm... (double time of the former plot)
out_img, result = tt.update_annotator(im,result,anno_dict=annodict,render_dict=render_dict,alpha=0.5)
unassigned Cortex Edge Other
# plot result
result_rgb = tt.rgb_from_labels(result,colors=list(annodict.values()))
out_img = tt.overlay_lebels(im,result_rgb,alpha=0.5)
isExist = os.path.exists(vis_path+'tissue_annotations')
if not(isExist):
os.mkdir(vis_path+'/tissue_annotations/')
tt.save_annotation(
folder=vis_path+'/tissue_annotations/',
label_image=result,
file_name='annotations_level_0',
anno_names=list(annodict.keys()),
anno_colors=list(annodict.values()),
ppm = ppm,
)
# load tissue annotations from jupyter annotator
annotations_level_0,annotations_level_0_labels,ppm,annodict = tt.load_annotation(vis_path+'/tissue_annotations/','annotations_level_0',load_colors=True)
# annotations_level_0[np.where(annotations_level_0==9)] = 1
im,ppm,positions = tt.read_visium(SpaceRanger_dir_path=vis_path+'/',use_resolution='hires5K',res_in_ppm=ppm)
result_rgb = tt.rgb_from_labels(annotations_level_0,colors=list(annodict.values()))
out_img = tt.overlay_lebels(im,result_rgb,alpha=0.5)
print(np.unique(annotations_level_0))
loaded annotation image - annotations_level_0 size - (3957, 3886)
loaded annotations
{1: 'unassigned', 2: 'Artifacts', 3: 'Medulla', 4: 'Cortex', 5: 'Edge', 6: 'HS', 7: 'Vessels', 8: 'PVS', 9: 'Other'}
loaded ppm
{'ppm': 0.5}
loaded color annotations
{'unassigned': 'yellow', 'Artifacts': 'black', 'Medulla': 'green', 'Cortex': 'cyan', 'Edge': 'brown', 'HS': 'magenta', 'Vessels': 'blue', 'PVS': 'red', 'Other': 'orange'}
[1 2 3 4 5 6 7 8 9]
%%time
# it's critical not to split cell part and recalculate the df everytime you cahnge the dist2cluster resolution - I have an overflow bug somewhere that I need to fix
spot_diameter = 30 # don't go crazy here avoid under 15um spots
df = tt.grid_anno(
im,
[annotations_level_0],
['annotations_level_0'],
[annotations_level_0_labels],
spot_diameter=spot_diameter,
pixels_per_micron=ppm
)
df
generating grid with spot size - 30, with resolution of - 0.5 ppm annotations_level_0
68640it [00:09, 7331.22it/s]
CPU times: user 9.87 s, sys: 93 ms, total: 9.97 s Wall time: 9.92 s
| x | y | annotations_level_0 | annotations_level_0_number | |
|---|---|---|---|---|
| index | ||||
| 0 | 15.0 | 15.000000 | unassigned | 1 |
| 1 | 15.0 | 29.931559 | unassigned | 1 |
| 2 | 15.0 | 44.863117 | unassigned | 1 |
| 3 | 15.0 | 59.794678 | unassigned | 1 |
| 4 | 15.0 | 74.726234 | unassigned | 1 |
| ... | ... | ... | ... | ... |
| 68635 | 3871.0 | 3889.773682 | unassigned | 1 |
| 68636 | 3871.0 | 3904.705322 | unassigned | 1 |
| 68637 | 3871.0 | 3919.636963 | unassigned | 1 |
| 68638 | 3871.0 | 3934.568359 | unassigned | 1 |
| 68639 | 3871.0 | 3949.500000 | unassigned | 1 |
68640 rows × 4 columns
# remove background spots
df = df.iloc[np.where(df['annotations_level_0']!='unassigned')[0],:]
from scipy.spatial import distance_matrix, distance
a = np.vstack([df['x'],df['y']]).astype(int)
distM = distance.cdist(a.T,a.T, metric='euclidean')
print('calculating distances')
tt.dist2cluster(df,annotation='annotations_level_0',distM=distM,resolution=1) # calculate minimum median distance of each spot to clusters
tt.plot_grid(df,annotation='annotations_level_0',figsize=[10,10],spotsize=3)
calculating distances Artifacts Cortex Edge HS Medulla Other PVS Vessels
{'Artifacts': array([ 0. , 667.69004785, 682.45512673, ..., 30. ,
15. , 0. ]),
'Cortex': array([339.20200471, 44.55333882, 44.55333882, ..., 15. ,
16.55294536, 16.55294536]),
'Edge': array([310.19026419, 0. , 0. , ..., 0. ,
0. , 15. ]),
'HS': array([923.48741193, 835.88097239, 830.96329642, ..., 94.86832981,
91.24143795, 90. ]),
'Medulla': array([805.76175139, 664.80448254, 653.3215135 , ..., 30. ,
33.54101966, 42.42640687]),
'Other': array([940.39353464, 596.69925423, 598.01755827, ..., 426.39183857,
432.90299144, 439.82951242]),
'PVS': array([843.54312279, 824.08858747, 830.04638425, ..., 656.54017394,
653.46002173, 650.71114944]),
'Vessels': array([2561.13744262, 2078.71234181, 2071.26289978, ..., 1505.42751403,
1508.54797736, 1511.81083473])}
# calculate distance matrix between hires and visium spots
positions = tt.anno_to_visium_spots(
df_spots=positions,
df_grid=df,
)
positions
make sure the coordinate systems are alligned e.g. axes are not flipped
migrating - annotations_level_0 to segmentations migrating - annotations_level_0_number to segmentations migrating - L2_dist_annotations_level_0_Artifacts to segmentations migrating - L2_dist_annotations_level_0_Cortex to segmentations migrating - L2_dist_annotations_level_0_Edge to segmentations migrating - L2_dist_annotations_level_0_HS to segmentations migrating - L2_dist_annotations_level_0_Medulla to segmentations migrating - L2_dist_annotations_level_0_Other to segmentations migrating - L2_dist_annotations_level_0_PVS to segmentations migrating - L2_dist_annotations_level_0_Vessels to segmentations
| 1 | 2 | 3 | 4 | 5 | annotations_level_0 | annotations_level_0_number | L2_dist_annotations_level_0_Artifacts | L2_dist_annotations_level_0_Cortex | L2_dist_annotations_level_0_Edge | L2_dist_annotations_level_0_HS | L2_dist_annotations_level_0_Medulla | L2_dist_annotations_level_0_Other | L2_dist_annotations_level_0_PVS | L2_dist_annotations_level_0_Vessels | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | |||||||||||||||
| GTATTCTTACCGTGCT-1 | 1.0 | 7.0 | 21.0 | 600.611934 | 892.788159 | Edge | 5 | 139.014388 | 15.000000 | 0.00000 | 239.610100 | 224.200803 | 1023.989258 | 215.127869 | 2404.819536 |
| GCGGTAACCCAAATGA-1 | 1.0 | 7.0 | 23.0 | 600.715978 | 942.575622 | Cortex | 4 | 93.920179 | 0.000000 | 27.45906 | 210.011904 | 195.243438 | 1033.924562 | 211.002370 | 2389.599548 |
| CTACGTGTTGCCACCA-1 | 1.0 | 6.0 | 24.0 | 557.335251 | 967.497706 | Edge | 5 | 50.089919 | 15.000000 | 0.00000 | 227.589543 | 214.462118 | 1081.566457 | 254.000000 | 2415.968543 |
| CTAGATAAACTCCTCG-1 | 1.0 | 7.0 | 25.0 | 600.820021 | 992.363084 | Cortex | 4 | 42.426407 | 0.000000 | 45.00000 | 173.556907 | 160.589539 | 1040.721384 | 209.000000 | 2362.246600 |
| TCCATTAGTTGGATAG-1 | 1.0 | 6.0 | 26.0 | 557.439295 | 1017.285168 | Cortex | 4 | 16.552945 | 0.000000 | 15.00000 | 200.179919 | 186.324985 | 1079.529990 | 246.456893 | 2388.600636 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| CTTGATGACCATCCAG-1 | 1.0 | 70.0 | 98.0 | 3340.880769 | 2807.819225 | Edge | 5 | 60.000000 | 26.627054 | 0.00000 | 166.721324 | 79.849859 | 322.280933 | 148.600135 | 950.223658 |
| CTTGCCCACCCACGCA-1 | 1.0 | 70.0 | 100.0 | 3340.984813 | 2857.606687 | Cortex | 4 | 16.552945 | 0.000000 | 15.00000 | 157.714933 | 54.120237 | 355.584027 | 110.453610 | 980.041326 |
| GGACAACCATGAAGCC-1 | 1.0 | 70.0 | 102.0 | 3341.088857 | 2907.394150 | Artifacts | 2 | 0.000000 | 15.000000 | 15.00000 | 174.631040 | 54.120237 | 397.555028 | 93.605555 | 1019.580796 |
| AGGTGCACGTCCACAT-1 | 1.0 | 70.0 | 104.0 | 3341.192900 | 2957.181612 | Edge | 5 | 15.000000 | 26.627054 | 0.00000 | 202.625270 | 66.189123 | 449.449663 | 93.920179 | 1066.723019 |
| ACCAAGTCATCGGCAG-1 | 1.0 | 70.0 | 106.0 | 3341.296944 | 3006.969075 | Edge | 5 | 33.541020 | 27.459060 | 0.00000 | 227.035240 | 80.709355 | 472.128161 | 111.126055 | 1099.483515 |
2958 rows × 15 columns
import scanpy as sc
adata_vis = sc.read_h5ad(vis_path+'/Corrected_AnnData.h5ad')
adata_vis.obs = pd.concat([adata_vis.obs,positions.iloc[:,5:]],axis=1)
sc.pl.spatial(adata_vis,color=['annotations_level_0','L2_dist_annotations_level_0_Edge','L2_dist_annotations_level_0_Medulla','L2_dist_annotations_level_0_HS'],cmap='gist_rainbow',ncols=2)